import scrapy
from douban_spider.items import DoubanSpiderItem


class DoubanNewmovieSpider(scrapy.Spider):
    name = "douban_newmovie"
    allowed_domains = ["movie.douban.com"]
    start_urls = ["https://movie.douban.com/cinema/nowplaying/guangzhou/"]

    def parse(self, response):
        data_list = response.xpath('//*[@id="nowplaying"]/div[2]/ul/li')
        for data in data_list:
            item = DoubanSpiderItem()
            item['title'] = data.xpath('@data-title').extract_first()
            item['score'] = data.xpath('@data-score').extract_first()
            item['actors'] = ','.join(data.xpath('@data-actors').extract_first().split(' /')[:2])
            item['run_time'] = data.xpath('@data-duration').extract_first().split('分钟')[0]
            item['review_number'] = data.xpath('@data-votecount').extract_first()
            item['regions'] = data.xpath('@data-region').extract_first()
            url = data.xpath('./ul/li[1]/a/@href').extract_first()
            yield scrapy.Request(url=url, callback=self.parse_main, meta={'item': item})

    def parse_main(self, response):
        item = response.meta['item']
        item['cover_url'] = response.xpath('//div[@id="mainpic"]/a/img/@src').extract_first()
        item['release_date'] = response.xpath('//span[@property="v:initialReleaseDate"]/@content').extract_first().split('(')[0]
        item['types'] =','.join(response.xpath('//span[@property="v:genre"]/text()').extract())
        item['content'] = response.xpath('//span[@property="v:summary"]/text()').extract_first()
        yield item